###############################################################################
# Clean Alaska's 2024 audit data
#
# written by sbaltz at mit,
#      november through december 2022
###############################################################################
import pandas as pd
import numpy as np
import itertools as it
import copy
import os


###############################################################################
# Global variables
###############################################################################
MAXWIDTH = 22

OFFICE_ORDER = {
    'U.S. President / Vice President': ['JOSEPH R BIDEN (DEM)',\
                     'DON BLANKENSHIP (CON)', 'ROCQUE DE LA FUENTE (ALI)',\
                     'JESSE VENTURA (GRN)', 'JO JORGENSEN (LIB)',\
                     'BROCK PIERCE (NOM)', 'DONALD J TRUMP (REP)', 'WRITE-IN'],
    'U.S. Senator': ['AL GROSS (DEM)', 'JOHN WAYNE HOWE (AIP)',\
                     'DAN SULLIVAN (REP)', 'WRITE-IN'],
    'U.S. Representative': ['ALYSE S GALVIN (DEM)', 'DON YOUNG (REP)',\
                            'WRITE-IN'],
    'Senate District B': ['EVAN A EADS (NOM)', 'ROBERT H MYERS JR (REP)', \
                          'MARNA L SANFORD (NOM)', 'WRITE-IN'],
    'Senate District D': ['THOMAS LAMB (DEM)', 'JAMES D MAYFIELD (NOM)', \
                          'DAVID S WILSON (REP)', 'WRITE-IN'],
    'Senate District F': ['GAVIN CHRISTIANSEN (NOM)', 'JIM COOPER (DEM)',\
                          'SHELLEY HUGHES (REP)', 'WRITE-IN'],
    'Senate District H': ['MADELEINE GAISER (REP)', 'BILL WIELECHOWSKI (DEM)',\
                          'WRITE-IN'],
    'Senate District L': ['NATASHA A VON IMHOF (REP)', 'ROSELYNN CACY (DEM)',\
                          'WRITE-IN'],
    'Senate District M': ['ANDY HOLLEMAN (NOM)', 'JOSHUA C REVAK (REP)',\
                          'WRITE-IN'],
    'Senate District N': ['CAROLYN C CLIFT (NOM)', 'ROGER HOLLAND (REP)',\
                          'CARL JOHNSON (DEM)', 'WRITE-IN'],
    'Senate District P': ['GREG MADDEN (AIP)', 'GARY STEVENS (REP)',\
                          'WRITE-IN'],
    'Senate District T': ['DONALD C OLSON (DEM)', 'THOMAS C BAKER (REP)',\
                          'WRITE-IN'],
    'House District 1': ['BARTON S LEBON (REP)', 'CHRISTOPHER QUIST (DEM)',\
                         'WRITE-IN'],
    'House District 2': ['JEREMIAH A YOUMANS (DEM)', 'STEVE M THOMPSON (REP)',\
                         'WRITE-IN'],
    'House District 4': ['GRIER H HOPKINS (DEM)', 'KEITH KURBER (REP)',\
                         'WRITE-IN'],
    'House District 5': ['KEVIN M MCKINLEY (REP)', 'ADAM L WOOL (DEM)',\
                         'WRITE-IN'],
    'House District 6': ['ELIJAH M VERHAGEN (NOM)', 'VERNON J CARLSON (NOM)', \
                         'MIKE CRONK (REP)', 'JULIA A HNILICKA (DEM)',\
                         'DEBORAH W RILEY (NOM)', 'WRITE-IN'],
    'House District 7': ['CHRISTOPHER KURKA (REP)', 'JAMIN L BURTON (NOM)',\
                         'WRITE-IN'],
    'House District 9': ['GEORGE RAUSCHER (REP)', 'BILL JOHNSON (DEM)',\
                         'WRITE-IN'],
    'House District 10': ['DAVID EASTMAN (REP)', 'MONICA L STEIN-OLSON (DEM)',\
                          'WRITE-IN'],
    'House District 11': ['ANDREA L HACKBARTH (DEM)', 'DELENA JOHNSON (REP)',\
                          'WRITE-IN'],
    'House District 13': ['JAMES A CANITZ SR (DEM)' 'KEN MCCARTY (REP)',\
                          'WRITE-IN'],
    'House District 14': ['MICHAEL W RISINGER (NOM)', 'KELLY R MERRICK (REP)',\
                          'WRITE-IN'],
    'House District 15': ['LYN D FRANKS (DEM)', 'DAVID NELSON (REP)',\
                          'WRITE-IN'],
    'House District 16': ['SCOTT A KOHLHAAS (NOM)', 'PAUL A BAUER (REP)',\
                          'IVY A SPOHNHOLZ (DEM)', 'WRITE-IN'],
    'House District 21': ['LYNETTE A LARGENT (REP)', 'MATT CLAMAN (DEM)',\
                          'WRITE-IN'],
    'House District 22': ['SARA E RASMUSSEN (REP)', 'DAVID W NEES (AIP)',\
                          'STEPHEN T TRIMBLE (NOM)', 'WRITE-IN'],
    'House District 23': ['KATHERINE J HENSLEE (REP)', 'TIMOTHY R HUIT (AIP)',\
                          'CHRIS S TUCK (DEM)', 'WRITE-IN'],
    'House District 24': ['SUE LEVI (DEM)', 'THOMAS W MCKAY (REP)',\
                          'WRITE-IN'],
    'House District 25': ['MEL GILLIS (REP)', 'CALVIN R SCHRAGE (DEM)',\
                          'WRITE-IN'],
    'House District 27': ['LANCE D PRUITT (REP)', 'LIZ SNYDER (DEM)',\
                          'WRITE-IN'],
    'House District 28': ['SUZANNE M LAFRANCE (DEM)',\
                          'BENJAMIN R FLETCHER (NOM)',\
                          'JAMES D KAUFMAN (REP)', 'WRITE-IN'],
    'House District 29': ['BENJAMIN E CARPENTER (REP)', 'PAUL D DALE (NOM)',\
                          'WRITE-IN'],
    'House District 30': ['RONALD D GILLHAM (REP)', 'JAMES BAISDEN (NOM)',\
                          'WRITE-IN'],
    'House District 31': ['SARAH L VANCE (REP)', 'KELLY COOPER (NOM)',\
                          'WRITE-IN'],
    'House District 34': ['EDWARD M KING (NOM)', 'ANDREA STORY (DEM)',\
                          'WRITE-IN'],
    'House District 35': ['JONATHAN S KREISS-TOMKINS (DEM)',\
                          'KENNY SKAFLESTAD (REP)', 'WRITE-IN'],
    'House District 36': ['DANIEL H ORTIZ (NOM)', 'LESLIE BECKER (REP)',\
                          'WRITE-IN'],
    'House District 38': ['WILLY KEPPEL (NOM)', 'TIFFANY ZULKOSKY (DEM)',\
                          'WRITE-IN'],
    'House District 39': ['NEAL W FOSTER (DEM)', 'DAN HOLMES (REP)',\
                          'WRITE-IN'],
    'House District 40': ['ELIZABETH FERGUSON (DEM)', 'JOSIAH PATKOTAK (NOM)',\
                          'WRITE-IN'],
    'District Court JD3 - Dickson': ['YES', 'NO'],
    'Ballot Measure No. 2 - 19AKBE': ['YES', 'NO']
}

PARTY_MAP = {' (DEM)': 'DEMOCRAT',\
             ' (REP)': 'REPUBLICAN',\
             ' (NOM)': 'INDEPENDENT',\
             ' (AIP)': 'ALASKAN INDEPENDENCE PARTY',\
             ' (CON)': 'CONSERVATIVE',\
             ' (LIB)': 'LIBERTARIAN',\
             ' (GRN)': 'GREEN',\
             ' (ALI)': 'ALLIANCE'
            }

VERBOSE = False


###############################################################################
# Global functions
###############################################################################
def GetOffice(row, order, rowID):
    global PARTY_MAP
    block = [[None]*5 for _ in it.repeat(None,len(order))]
    startLoc = 4
    precinct = row[startLoc-1]
    err = False
    for i in range(len(order)):
        candidate = copy.deepcopy(order[i])
        party = ''
        for pkey in PARTY_MAP.keys():
            if pkey in candidate:
                party = PARTY_MAP[pkey]
                candidate = candidate.replace(pkey, '')
        try:
            original = int(row[startLoc + 2*i])
            audited = int(row[startLoc + 2*i + 1])
            block[i][0] = precinct
            block[i][1] = candidate
            block[i][2] = original
            block[i][3] = audited
            block[i][4] = party
        except:
            err = True
            if VERBOSE:
                print(f"Could not parse row {rowID}")
            break
    if err:
        return(False)
    else:
        return(block)

def QuietPrint(theString):
    """Defines verbosity-aware printing""" 
    global VERBOSE
    print(theString) if VERBOSE else None


###############################################################################
# Apply
###############################################################################
ak = pd.DataFrame({'office':    pd.Series(dtype='str'),
                   'precinct':  pd.Series(dtype='str'),
                   'candidate': pd.Series(dtype='str'),
                   'original':  pd.Series(dtype='int'),
                   'audited':   pd.Series(dtype='int'),
                   'party':     pd.Series(dtype='str'),
                   'district':  pd.Series(dtype='str')
    })
fnames = [f for f in os.listdir('../transcribed/') if 'lock' not in f]
for fname in fnames:
    QuietPrint(fname)
    df = pd.read_csv('../transcribed/'+fname,names=[str(i) for i in range(MAXWIDTH)])
    office = ''
    startLoc = 4
    for i in range(len(df)):
        if 'Page: ' in str(df['0'][i]) and 'Precinct' in str(df['0'][i+2]):
            office = df['0'][i+1]
            rawOffice = copy.deepcopy(office)
            if 'District' in office and 'Court' not in office:
                office = office[:office.find('District')-1]
                district = office[office.find('District')+len('District')+1:]
            elif 'District' in office and 'Court' in office:
                district = 3
            elif 'District' not in office:
                district = ''
            QuietPrint(office)
        if not np.isnan(df.loc[i,:][startLoc+1]):
            order = OFFICE_ORDER[rawOffice]
            newBlock = GetOffice(row = df.loc[i,:], order = order, rowID = i)
            QuietPrint(newBlock)
            if newBlock:
                for row in newBlock:
                    row.insert(0, office)
                    row.append(district)
                    row = pd.DataFrame([row], columns=list(ak.columns))
                    ak = pd.concat([ak, row], ignore_index=True)
    QuietPrint("\n")

#Standardize office
ak.replace({'office': {'U.S. President / Vice President': 'US PRESIDENT',
                       'U.S. Senator': 'US SENATE',
                       'U.S. Representative': 'US HOUSE',
                       'House': 'STATE HOUSE',
                       'Senate': 'STATE SENATE'
          }}, inplace=True)

#Other info
ak['state'] = 'ALASKA'
ak['difference'] = ak.audited - ak.original
ak['method'] = 'MANUAL'

ak.to_csv('../ready/ak.csv', index=False)

